{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### This tutorial is deprecated, please see the new [Knet/tutorial](https://github.com/denizyuret/Knet.jl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "using Pkg; for p in (\"Knet\",\"Plots\",\"Images\"); haskey(Pkg.installed(),p) || Pkg.add(p); end\n",
    "using Knet, Plots, Images, Random"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Linear regression example with housing data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [],
   "source": [
    "# Download the housing dataset from the UCI Machine Learning Repository\n",
    "include(Knet.dir(\"data\",\"housing.jl\"))\n",
    "x,y = housing()\n",
    "map(summary,(x,y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define loss\n",
    "predict(w,x) = w[1]*x .+ w[2]\n",
    "loss(w,x,y) = mean(abs2,y-predict(w,x))\n",
    "lossgradient = grad(loss);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize model\n",
    "Random.seed!(42)\n",
    "w = [ 0.1*rand(1,13), 0.0 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss(w,x,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lossgradient(w,x,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Testing the gradient: Increasing w[2] by eps should decrease loss by 45.0656 * eps\n",
    "w[2]=0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss(w,x,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# SGD training loop\n",
    "function train!(w, data; lr=.1)\n",
    "    for (x,y) in data\n",
    "        dw = lossgradient(w, x, y)\n",
    "        for i in 1:length(w)\n",
    "            w[i] -= lr * dw[i]\n",
    "        end\n",
    "    end\n",
    "    return w\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Record the weights for 10 epochs\n",
    "@time weights = [ copy(train!(w, [(x, y)])) for epoch=1:10 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "losses = [ loss(w,x,y) for w in weights ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "plot(losses,xlabel=\"Epochs\",ylabel=\"Loss\") "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load and minibatch MNIST data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "include(Knet.dir(\"data\",\"mnist.jl\"))\n",
    "xtrn,ytrn,xtst,ytst = mnist()\n",
    "Atype = gpu() >= 0 ? KnetArray{Float32} : Array{Float32}\n",
    "dtst = minibatch(xtst,ytst,100;xtype=Atype); # [ (x1,y1), (x2,y2), ... ] where xi,yi are minibatches of 100\n",
    "dtrn = minibatch(xtrn,ytrn,100;xtype=Atype); # [ (x1,y1), (x2,y2), ... ] where xi,yi are minibatches of 100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dtrn and dtst are iterables of (x,y) minibatches, each minibatch contains 100 instances\n",
    "length(dtrn),length(dtst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Take a look at the first three test images and labels\n",
    "(x,y) = first(dtst)\n",
    "ax = Array(x)\n",
    "for i=1:3; display(mnistview(ax,i)); end\n",
    "y[1:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Softmax classification example with MNIST"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define softmax loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define loss\n",
    "predict(w,x) = w[1]*mat(x) .+ w[2]  # Same as linreg except we need mat() to convert input 4D->2D before matmul\n",
    "loss(w,x,ygold) = nll(predict(w,x),ygold); # nll is negative log likelihood"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize model\n",
    "wsoft=map(Atype, [ 0.1*randn(10,784), zeros(10,1) ]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Average loss for a single (x,y) minibatch\n",
    "loss(wsoft, x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Average loss for the whole test set\n",
    "nll(wsoft,dtst,predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Accuracy for the whole test set\n",
    "accuracy(wsoft,dtst,predict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train softmax model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time softmodels = [ copy(train!(wsoft, dtrn)) for epoch=1:60 ];  # ~17 seconds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot softmax learning curve"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time trnsoftloss = [ nll(w,dtrn,predict) for w in softmodels ];  # ~13 seconds\n",
    "@time tstsoftloss = [ nll(w,dtst,predict) for w in softmodels ];  # ~2 seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot([trnsoftloss tstsoftloss],ylim=(.2,.36),labels=[:trnsoftloss :tstsoftloss],xlabel=\"Epochs\",ylabel=\"Loss\") "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot softmax error rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time trnsofterr = [ 1-accuracy(w,dtrn,predict) for w in softmodels ];  # ~12 seconds\n",
    "@time tstsofterr = [ 1-accuracy(w,dtst,predict) for w in softmodels ];  # ~2 seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "plot([trnsofterr tstsofterr],ylim=(.06,.10),labels=[:trnsofterr :tstsofterr],xlabel=\"Epochs\",ylabel=\"Error\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cleanup\n",
    "wsoft = softmodels = nothing; Knet.gc()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Multilayer perceptron example with MNIST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We only need to change the predict function!\n",
    "function predict(w,x)\n",
    "    for i=1:2:length(w)\n",
    "        x = w[i]*mat(x) .+ w[i+1]\n",
    "        if i<length(w)-1\n",
    "            x = max.(0,x)                         \n",
    "        end\n",
    "    end\n",
    "    return x\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wmlp=map(Atype, [ 0.1*randn(64,784), zeros(64,1), \n",
    "                  0.1*randn(10,64),  zeros(10,1) ])\n",
    "loss(wmlp, x, y)  # average loss for random model should be close to log(10)=2.3026"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train MLP model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time mlpmodels = [ copy(train!(wmlp, dtrn)) for epoch=1:60 ]; # ~20 seconds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare MLP loss with softmax loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time trnmlploss = [ nll(w,dtrn,predict) for w in mlpmodels ]; # ~12 seconds\n",
    "@time tstmlploss = [ nll(w,dtst,predict) for w in mlpmodels ]; # ~2 seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot([trnsoftloss tstsoftloss trnmlploss tstmlploss],ylim=(.0,.36),labels=[:trnsoftloss :tstsoftloss :trnmlploss :tstmlploss],xlabel=\"Epochs\",ylabel=\"Loss\") "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare MLP error with softmax error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time trnmlperr = [ 1-accuracy(w,dtrn,predict) for w in mlpmodels ]; # ~13 seconds\n",
    "@time tstmlperr = [ 1-accuracy(w,dtst,predict) for w in mlpmodels ]; # ~2 seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "plot([trnsofterr tstsofterr trnmlperr tstmlperr],ylim=(.0,.10),labels=[:trnsofterr :tstsofterr :trnmlperr :tstmlperr],xlabel=\"Epochs\",ylabel=\"Error\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cleanup\n",
    "wmlp = mlpmodels = nothing; Knet.gc()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CNN example with MNIST (The LeNet model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We only need to change the predict function!\n",
    "function predict(w,x) # LeNet model\n",
    "    n=length(w)-4\n",
    "    for i=1:2:n\n",
    "        x = pool(relu.(conv4(w[i],x) .+ w[i+1]))\n",
    "    end\n",
    "    for i=n+1:2:length(w)-2\n",
    "        x = relu.(w[i]*mat(x) .+ w[i+1])\n",
    "    end\n",
    "    return w[end-1]*x .+ w[end]\n",
    "end"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "wcnn=map(Atype, [ 0.1*randn(5,5,1,20),  zeros(1,1,20,1), \n",
    "                  0.1*randn(5,5,20,50), zeros(1,1,50,1),\n",
    "                  0.1*randn(500,800),  zeros(500,1),\n",
    "                  0.1*randn(10,500),  zeros(10,1) ])\n",
    "loss(wcnn, x, y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train CNN model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time cnnmodels = [ copy(train!(wcnn, dtrn)) for epoch=1:60 ]; # ~127 seconds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare CNN loss with MLP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time trncnnloss = [ nll(w,dtrn,predict) for w in cnnmodels ]; # ~48 seconds\n",
    "@time tstcnnloss = [ nll(w,dtst,predict) for w in cnnmodels ]; # ~8 seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot([trnsoftloss tstsoftloss trnmlploss tstmlploss trncnnloss tstcnnloss],ylim=(.0,.36),labels=[:trnsoftloss :tstsoftloss :trnmlploss :tstmlploss :trncnnloss :tstcnnloss],xlabel=\"Epochs\",ylabel=\"Loss\") "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare CNN error with MLP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@time trncnnerr = [ 1-accuracy(w,dtrn,predict) for w in cnnmodels ]; # ~48 seconds\n",
    "@time tstcnnerr = [ 1-accuracy(w,dtst,predict) for w in cnnmodels ]; # ~8 seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "plot([trnsofterr tstsofterr trnmlperr tstmlperr trncnnerr tstcnnerr],ylim=(.0,.10),labels=[:trnsofterr :tstsofterr :trnmlperr :tstmlperr :trncnnerr :tstcnnerr],xlabel=\"Epochs\",ylabel=\"Error\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cleanup\n",
    "wcnn = cnnmodels = nothing; Knet.gc()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Shakespeare example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Please see charlm.ipynb for training a character based language model on \"The Complete Works of William Shakespeare\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# VGG example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "include(Knet.dir(\"examples/vgg/vgg.jl\"));"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "caturl = \"https://github.com/BVLC/caffe/raw/master/examples/images/cat.jpg\"\n",
    "catfile = download(caturl)\n",
    "load(catfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "VGG.main(catfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "VGG.main(\"https://cvimg1.cardekho.com/p/237x156/in/mahindra/torro-25/mahindra-torro-25.jpg\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 1.0.0",
   "language": "julia",
   "name": "julia-1.0"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.0.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
